package simplecrawler;

import threads.*;
import java.net.*;
import java.io.*;
import java.util.Vector;

public class PSuckerThread extends ControllableThread {

    public void process(Object o) {
        // The objects that we're dealing with here a strings for urls
        try {
            URL pageURL = (URL) o;

            // See if it's a jpeg, mpeg or avi

            String filename = pageURL.getFile().toLowerCase();
            
            //String fileContent = pageURL.getContent().toString();
            
            if (filename.endsWith(".jpg")
                    || filename.endsWith(".jpeg")
                    || filename.endsWith(".mpeg")
                    || filename.endsWith(".mpg")
                    || filename.endsWith(".avi")
                    || filename.endsWith(".wmv")) {
                filename = filename.replace('/', '-');
                filename = ((URLQueue) queue).getFilenamePrefix()
                        + pageURL.getHost() + filename;
                System.out.println("Saving to file " + filename);
                try {
                    SaveURL.writeURLtoFile(pageURL, filename);
                } catch (Exception e) {
                    System.out.println("Saving to file " + filename + " from URL " + pageURL.toString() + " failed due to a " + e.toString());
                }
                return;
            }            

            // If it's neither a jpg nor some text, it's not interesting.
            String mimetype = pageURL.openConnection().getContentType();
            if (!mimetype.startsWith("text")) {
                return;
            }

            String rawPage = SaveURL.getURL(pageURL);
            String smallPage = rawPage.toLowerCase().replaceAll("\\s", " ");
            // treat the url a a html file and try to extract links
            Vector links = SaveURL.extractLinks(rawPage, smallPage, initURL);
            // Convert each link text to a url and enque
            for (int n = 0; n < links.size(); n++) {
                try {
                    // urls might be relative to current page
                    URL link = new URL(pageURL,
                            (String) links.elementAt(n));
                    // If layers are not used, write everything into same layer
                    if (tc.getMaxLevel() == -1) {
                        queue.push(link, level);
                    } else {
                        queue.push(link, level + 1);
                    }
                } catch (MalformedURLException e) {
                    // Ignore malformed URLs, the link extractor might
                    // have failed.
                }
            }
        } catch (Exception e) {
            // e.printStackTrace();
            // process of this object has failed, but we just ignore it here
        }
    }
}
